We have been using fully connected networks (FCNs) to classify the MNIST dataset, and in the last assignment we designed a network which could do this with an accuracy of around 98%.
Convolutional Neural Networks, or Convnets, or CNNs, are another even more powerful tool for classifying images such as MNIST. You might ask, what do Convnets do that FCNs can't?
To understand this, let's directly compare FCNs and CNNs on the task of classifying MNIST data.
import tensorflow as tf
from tensorflow import keras
print(tf.__version__)
print(keras.__version__)
from keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
short = True
if short:
train_images = train_images[:15000,:]
train_labels = train_labels[:15000]
test_images = test_images[:3000,:]
test_labels = test_labels[:3000]
#
print("Train info",train_images.shape, train_labels.shape)
print("Test info",test_images.shape, test_labels.shape)
train_images = train_images.reshape((train_images.shape[0],28,28,1))
train_images = train_images.astype('float32')/255
#
test_images = test_images.reshape((test_images.shape[0],28,28,1))
test_images = test_images.astype('float32')/255
train_labels_cat = keras.utils.to_categorical(train_labels)
test_labels_cat = keras.utils.to_categorical(test_labels)
2.6.0 2.6.0 Train info (15000, 28, 28) (15000,) Test info (3000, 28, 28) (3000,)
We will build a simple 1-hidden-layer network. We will use 400 hidden nodes since that was close to optimal based on our earlier studies.
NOTE: We will then save the network for later use.
from keras import models
from keras import layers
#
# Define the model
fcn_network = models.Sequential()
#
fcn_network.add(layers.Flatten(input_shape=[28,28]))
#
# Hidden
fcn_network.add(layers.Dense(400,activation='tanh'))
#
# Output
fcn_network.add(layers.Dense(10,activation='softmax'))
#
# Compile
fcn_network.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
#
# Fit/save/print summary
history = fcn_network.fit(train_images,train_labels_cat,epochs=15,batch_size=128,validation_data=(test_images,test_labels_cat))
fcn_network.save('fully_trained_model_fcn.h5')
Epoch 1/15 118/118 [==============================] - 1s 7ms/step - loss: 0.5063 - accuracy: 0.8547 - val_loss: 0.4003 - val_accuracy: 0.8800 Epoch 2/15 118/118 [==============================] - 1s 5ms/step - loss: 0.2880 - accuracy: 0.9171 - val_loss: 0.3499 - val_accuracy: 0.8963 Epoch 3/15 118/118 [==============================] - 1s 6ms/step - loss: 0.2361 - accuracy: 0.9336 - val_loss: 0.3144 - val_accuracy: 0.9047 Epoch 4/15 118/118 [==============================] - 1s 6ms/step - loss: 0.1939 - accuracy: 0.9453 - val_loss: 0.3189 - val_accuracy: 0.9047 Epoch 5/15 118/118 [==============================] - 1s 5ms/step - loss: 0.1699 - accuracy: 0.9510 - val_loss: 0.2713 - val_accuracy: 0.9183 Epoch 6/15 118/118 [==============================] - 1s 5ms/step - loss: 0.1371 - accuracy: 0.9608 - val_loss: 0.2489 - val_accuracy: 0.9200 Epoch 7/15 118/118 [==============================] - 1s 6ms/step - loss: 0.1153 - accuracy: 0.9678 - val_loss: 0.2341 - val_accuracy: 0.9247 Epoch 8/15 118/118 [==============================] - 1s 5ms/step - loss: 0.0981 - accuracy: 0.9733 - val_loss: 0.2247 - val_accuracy: 0.9310 Epoch 9/15 118/118 [==============================] - 1s 7ms/step - loss: 0.0837 - accuracy: 0.9788 - val_loss: 0.2136 - val_accuracy: 0.9367 Epoch 10/15 118/118 [==============================] - 1s 5ms/step - loss: 0.0727 - accuracy: 0.9813 - val_loss: 0.2067 - val_accuracy: 0.9340 Epoch 11/15 118/118 [==============================] - 1s 6ms/step - loss: 0.0614 - accuracy: 0.9850 - val_loss: 0.2012 - val_accuracy: 0.9380 Epoch 12/15 118/118 [==============================] - 1s 5ms/step - loss: 0.0499 - accuracy: 0.9877 - val_loss: 0.1927 - val_accuracy: 0.9397 Epoch 13/15 118/118 [==============================] - 1s 6ms/step - loss: 0.0418 - accuracy: 0.9911 - val_loss: 0.1887 - val_accuracy: 0.9443 Epoch 14/15 118/118 [==============================] - 1s 5ms/step - loss: 0.0377 - accuracy: 0.9918 - val_loss: 0.1872 - val_accuracy: 0.9430 Epoch 15/15 118/118 [==============================] - 1s 5ms/step - loss: 0.0315 - accuracy: 0.9938 - val_loss: 0.1809 - val_accuracy: 0.9440
Lets try to build a CNN to classify MNIST images.
This is based on the network in the notebook: how_cnn_works.ipynb in this directory.
NOTE: We will then save the network for later use.
from keras import models
from keras import layers
#
cnn_network = models.Sequential()
#
# First convolutional layer
cnn_network.add(layers.Conv2D(30,(5,5),activation='relu',input_shape=(28,28,1)))
# Pool
cnn_network.add(layers.MaxPooling2D((2,2)))
#
# Second convolutional layer
cnn_network.add(layers.Conv2D(25,(5,5),activation='relu'))
# Pool
cnn_network.add(layers.MaxPooling2D((2,2)))
#
# LAst Convolutional layer
#cnn_network.add(layers.Conv2D(32,(3,3),activation='relu'))
#
# Connect to a dense output layer - just like an FCN
cnn_network.add(layers.Flatten())
cnn_network.add(layers.Dense(64,activation='relu'))
cnn_network.add(layers.Dense(10,activation='softmax'))
#
# Compile
cnn_network.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])
#
# Fit/save/print summary
history = cnn_network.fit(train_images,train_labels_cat,epochs=5,batch_size=256,validation_data=(test_images,test_labels_cat))
cnn_network.save('fully_trained_model_cnn.h5')
Epoch 1/5 59/59 [==============================] - 11s 167ms/step - loss: 0.8563 - accuracy: 0.7383 - val_loss: 0.4157 - val_accuracy: 0.8783 Epoch 2/5 59/59 [==============================] - 8s 136ms/step - loss: 0.2652 - accuracy: 0.9203 - val_loss: 0.2363 - val_accuracy: 0.9337 Epoch 3/5 59/59 [==============================] - 8s 136ms/step - loss: 0.1714 - accuracy: 0.9473 - val_loss: 0.2183 - val_accuracy: 0.9330 Epoch 4/5 59/59 [==============================] - 8s 136ms/step - loss: 0.1278 - accuracy: 0.9597 - val_loss: 0.1549 - val_accuracy: 0.9517 Epoch 5/5 59/59 [==============================] - 8s 141ms/step - loss: 0.0964 - accuracy: 0.9715 - val_loss: 0.2078 - val_accuracy: 0.9367
The following method will be helpful later to get loss, accuracy, and the confusion matrix for our network.
We can use this for both the FCN as well as the CNN.
import numpy as np
#
# Used to implement the multi-dimensional counter we need in the performance class
from collections import defaultdict
from functools import partial
from itertools import repeat
def nested_defaultdict(default_factory, depth=1):
result = partial(defaultdict, default_factory)
for _ in repeat(None, depth - 1):
result = partial(defaultdict, result)
return result()
#
def getPerformance(network,images,labels_cat,labels):
#
# Get the overall performance for the test sample
loss, acc = network.evaluate(images,labels_cat)
#
# Get the individual predictions for each sample in the test set
predictions = network.predict(images)
#
# Get the max probabilites for each rows
probs = np.max(predictions, axis = 1)
#
# Get the predicted classes for each row
classes = np.argmax(predictions, axis = 1)
#
# Now loop over the first twenty samples and compare truth to prediction
#print("Label\t Pred\t Prob")
#for label,cl,pr in zip(smear_labels[:20],classes[:20],probs[:20]):
# print(label,'\t',cl,'\t',round(pr,3))
#
# Get confustion matrix
cf = nested_defaultdict(int,2)
for label,cl in zip(labels,classes):
cf[label][cl] += 1
#
return loss,acc,cf
Since we trained the networks in this same notebook, we don't really need to do this step, but this shows you how to do it.
from keras.models import load_model
#
# FCN first
network_name = 'fully_trained_model_fcn.h5'
fcn_network = load_model(network_name)
#
# CNN next
network_name = 'fully_trained_model_cnn.h5'
cnn_network = load_model(network_name)
We will use out "getPerformance" method. Are the networks similar?
loss,acc,cf = getPerformance(fcn_network,test_images,test_labels_cat,test_labels)
print(" Results for FCN:")
print(" Loss",round(loss,4),"; accuracy on test set:",round(acc,4))
print(" confusion matrix:")
for trueClass in range(10):
print(" True: ",trueClass,end="")
for predClass in range(10):
print(" \t",cf[trueClass][predClass],end="")
print()
print()
loss,acc,cf = getPerformance(cnn_network,test_images,test_labels_cat,test_labels)
print(" Results for CNN:")
print(" Loss",round(loss,4),"; accuracy on test set:",round(acc,4))
print(" confusion matrix:")
for trueClass in range(10):
print(" True: ",trueClass,end="")
for predClass in range(10):
print(" \t",cf[trueClass][predClass],end="")
print()
print()
94/94 [==============================] - 0s 1ms/step - loss: 0.1809 - accuracy: 0.9440 Results for FCN: Loss 0.1809 ; accuracy on test set: 0.944 confusion matrix: True: 0 262 0 1 0 0 2 5 0 1 0 True: 1 0 334 2 0 0 0 2 1 1 0 True: 2 3 1 295 1 1 1 2 3 4 2 True: 3 0 0 3 298 0 8 0 5 1 1 True: 4 0 0 3 0 303 0 3 1 1 7 True: 5 1 0 1 4 2 269 1 1 3 1 True: 6 3 3 2 0 3 3 254 1 3 0 True: 7 0 3 6 3 3 0 0 285 1 5 True: 8 4 0 2 6 2 0 1 2 267 2 True: 9 2 2 1 6 11 3 0 3 2 265 94/94 [==============================] - 1s 5ms/step - loss: 0.2078 - accuracy: 0.9367 Results for FCN: Loss 0.2078 ; accuracy on test set: 0.9367 confusion matrix: True: 0 266 0 0 0 0 0 5 0 0 0 True: 1 0 339 1 0 0 0 0 0 0 0 True: 2 4 2 304 0 2 0 0 1 0 0 True: 3 1 9 17 239 0 38 0 5 3 4 True: 4 0 2 1 0 313 0 1 0 0 1 True: 5 0 0 0 0 2 278 2 1 0 0 True: 6 2 3 0 0 1 2 264 0 0 0 True: 7 1 6 11 0 1 0 0 284 0 3 True: 8 10 9 7 0 4 1 1 5 247 2 True: 9 1 5 0 0 10 2 1 0 0 276
Keras gives us a tool to get summary information about our network:
print()
print("FCN:")
print(fcn_network.summary())
print()
print("CNN:")
print(cnn_network.summary())
FCN: Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten (Flatten) (None, 784) 0 _________________________________________________________________ dense (Dense) (None, 400) 314000 _________________________________________________________________ dense_1 (Dense) (None, 10) 4010 ================================================================= Total params: 318,010 Trainable params: 318,010 Non-trainable params: 0 _________________________________________________________________ None CNN: Model: "sequential_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 24, 24, 30) 780 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 12, 12, 30) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 8, 8, 25) 18775 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 4, 4, 25) 0 _________________________________________________________________ flatten_1 (Flatten) (None, 400) 0 _________________________________________________________________ dense_2 (Dense) (None, 64) 25664 _________________________________________________________________ dense_3 (Dense) (None, 10) 650 ================================================================= Total params: 45,869 Trainable params: 45,869 Non-trainable params: 0 _________________________________________________________________ None
Notice that the two layers are called dense: these are fully connected layers, meaning there is a connect from every input to every output. Here is how we get the paremeters:
So we have 318,010 total paremeters for a network which is used to classify small 28x28 greyscale images. If we went to megapixel color images, we would have 3x1000x1000 = 3,000,000 input pixels, and if we have a 400 node hidden layer (which is probably too small), we end up with more than 1.2 billion parameters.... this does not scale!
If we cave a 1000x1000 greyscale image, this is a megapixel image (1000x1000=1,000,000 pixels). If this is a color image, then each pixel has 3 possible values (corresponding to RGB - red,green,blue), for a total of 3,000,000 pixels. If we were to classify color images of this size of digits (10 classes total) this with a single hidden layer of 400 nodes, how many parameters are needed?
3 million pixels means we have an input of 3 million
3 million * 400 hidden nodes
Each hidden node has a bias node = 400
400 hidden nodes * 10 classes/outputs
Each output node has a bias node = 10
hiddennodes = 400
pixel_inputs = 3000000
outputnodes = 10
print(f'3 million inputs connected to 400 hidden nodes: {pixel_inputs*hiddennodes} parameters')
print(f'Each hidden node has a bias node: {hiddennodes} parameters')
print(f'Each hidden node is connected to the 10 output nodes: {outputnodes*hiddennodes} parameters')
print(f'Each output node has a bias node: {outputnodes} parameters')
print()
print(f'Total parameters: {pixel_inputs*hiddennodes+hiddennodes+outputnodes*hiddennodes+outputnodes}')
3 million inputs connected to 400 hidden nodes: 1200000000 parameters Each hidden node has a bias node: 400 parameters Each hidden node is connected to the 10 output nodes: 4000 parameters Each output node has a bias node: 10 parameters Total parameters: 1200004410
The types of variations I want us to consider include:
Let's define a method to do this, using a single image as an input, and also define a method to display the image:
import keras.preprocessing.image as kpi
import matplotlib.pyplot as plt
import numpy as np
data_gen_args = dict(featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=90,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=0.2)
image_datagen = kpi.ImageDataGenerator(**data_gen_args)
#
#
#Inputs:
# tx = shift left or right (0 is no shift)
# ty = shift up or down (0 is no shift)
# zoom = image smaller or larger (1.0 is no zoom)
# theta = rotate about axis through center of image (0 is no rotation)
def transform_image(img,tx=0,ty=0,zoom=1.0,rotation=0.0,shear=0.0):
transform_parameters = {}
orig_image = np.array(img, copy=True).reshape(28,28,1)
transform_parameters['theta'] = rotation
transform_parameters['zx'] = zoom
transform_parameters['zy'] = zoom
transform_parameters['tx'] = tx
transform_parameters['ty'] = ty
transform_parameters['shear'] = shear
orig_image = image_datagen.apply_transform(x=orig_image, transform_parameters=transform_parameters)
return orig_image
def plot_image(img):
one_image = img.reshape(28,28)
plt.imshow(one_image, cmap='hot')
plt.colorbar()
plt.show()
# Item 1
image = test_images[421,:,:,:]
plot_image(image)
# Item 2
trans_image = transform_image(image,rotation=45)
plot_image(trans_image)
# Item 3
trans_image = transform_image(image,rotation=45,zoom=1.5)
plot_image(trans_image)
# Item 4
trans_image = transform_image(image,rotation=45,zoom=1.5,tx=10,ty=-6)
plot_image(trans_image)
We are now ready to systematically answer the question: how well does the FCN vs the CNN handle images that are slight (or not-so-slight) variations of the data it was train on.
Here is what we will do:
When we are done, we will run that list of images through our original FCN and CNN and note the performance, comparing it to the default.
import random
import numpy as np
fcn_shift_acc = []
cnn_shift_acc = []
shifts = [0,1,2,3,4]
for shift in shifts:
print()
print("Shift ",shift)
imgList = []
for img in test_images[:]:
if random.uniform(0.0,1.0) > 0.5:
tx = shift
else:
tx = -shift
if random.uniform(0.0,1.0) > 0.5:
ty = shift
else:
ty = -shift
trans_image = transform_image(img,tx=tx,ty=ty,zoom=1.0,rotation=0.0,shear=0.0)
imgList.append(trans_image)
#
# Convert to np array: Note the second line reshape for FCNs
npa_images = np.asarray(imgList, dtype=np.float32)
#
# Run all of the shifted images through our previously train FCN and see how we do
smear_loss,smear_acc,smear_cf = getPerformance(fcn_network,npa_images,test_labels_cat,test_labels)
print()
print(" FCN Results")
print(" Loss,acc",smear_loss,smear_acc)
#
# Store the accuracy for the test set in a list
fcn_shift_acc.append(smear_acc)
#
# Print the confusion matrix
for trueClass in range(10):
print(" True: ",trueClass,end="")
for predClass in range(10):
print(" \t",smear_cf[trueClass][predClass],end="")
print()
#
# Run all of the shifted images through our previously train CNN and see how we do
smear_loss,smear_acc,smear_cf = getPerformance(cnn_network,npa_images,test_labels_cat,test_labels)
print()
print(" CNN Results")
print(" Loss,acc",smear_loss,smear_acc)
#
# Sore the accuracy for the test set in a list
cnn_shift_acc.append(smear_acc)
#
# Print the confusion matrix
for trueClass in range(10):
print(" True: ",trueClass,end="")
for predClass in range(10):
print(" \t",smear_cf[trueClass][predClass],end="")
print()
Shift 0 94/94 [==============================] - 0s 1ms/step - loss: 0.1809 - accuracy: 0.9440 FCN Results Loss,acc 0.18091194331645966 0.9440000057220459 True: 0 262 0 1 0 0 2 5 0 1 0 True: 1 0 334 2 0 0 0 2 1 1 0 True: 2 3 1 295 1 1 1 2 3 4 2 True: 3 0 0 3 298 0 8 0 5 1 1 True: 4 0 0 3 0 303 0 3 1 1 7 True: 5 1 0 1 4 2 269 1 1 3 1 True: 6 3 3 2 0 3 3 254 1 3 0 True: 7 0 3 6 3 3 0 0 285 1 5 True: 8 4 0 2 6 2 0 1 2 267 2 True: 9 2 2 1 6 11 3 0 3 2 265 94/94 [==============================] - 0s 4ms/step - loss: 0.2078 - accuracy: 0.9367 CNN Results Loss,acc 0.20780420303344727 0.9366666674613953 True: 0 266 0 0 0 0 0 5 0 0 0 True: 1 0 339 1 0 0 0 0 0 0 0 True: 2 4 2 304 0 2 0 0 1 0 0 True: 3 1 9 17 239 0 38 0 5 3 4 True: 4 0 2 1 0 313 0 1 0 0 1 True: 5 0 0 0 0 2 278 2 1 0 0 True: 6 2 3 0 0 1 2 264 0 0 0 True: 7 1 6 11 0 1 0 0 284 0 3 True: 8 10 9 7 0 4 1 1 5 247 2 True: 9 1 5 0 0 10 2 1 0 0 276 Shift 1 94/94 [==============================] - 0s 1ms/step - loss: 0.7240 - accuracy: 0.8033 FCN Results Loss,acc 0.723962664604187 0.8033333420753479 True: 0 199 0 11 12 2 21 12 3 7 4 True: 1 2 233 1 23 1 8 31 21 20 0 True: 2 3 3 276 9 1 3 8 5 4 1 True: 3 1 1 10 273 0 13 1 12 4 1 True: 4 0 1 5 5 265 4 11 3 9 15 True: 5 6 0 0 26 3 232 3 5 5 3 True: 6 4 4 4 3 4 9 238 0 6 0 True: 7 1 4 5 13 6 3 0 264 3 7 True: 8 3 0 5 38 1 13 7 3 210 6 True: 9 1 1 0 14 18 3 1 24 13 220 94/94 [==============================] - 0s 4ms/step - loss: 0.3786 - accuracy: 0.8893 CNN Results Loss,acc 0.3786417245864868 0.8893333077430725 True: 0 250 1 1 0 5 0 8 0 1 5 True: 1 0 339 0 0 0 1 0 0 0 0 True: 2 3 6 298 0 1 0 1 3 0 1 True: 3 1 9 21 228 1 39 0 9 2 6 True: 4 0 5 2 0 306 0 3 0 0 2 True: 5 6 0 1 0 1 270 3 1 0 1 True: 6 4 7 0 0 6 2 252 0 1 0 True: 7 1 19 15 0 4 1 0 259 1 6 True: 8 10 16 7 1 4 8 7 7 212 14 True: 9 2 11 0 0 19 5 0 2 2 254 Shift 2 94/94 [==============================] - 0s 1ms/step - loss: 3.7489 - accuracy: 0.4230 FCN Results Loss,acc 3.7489423751831055 0.4230000078678131 True: 0 39 0 49 58 1 57 38 6 19 4 True: 1 8 37 6 94 2 21 89 58 18 7 True: 2 3 3 202 49 5 4 32 7 7 1 True: 3 2 1 51 206 0 15 14 8 5 14 True: 4 0 1 20 38 150 25 19 17 18 30 True: 5 8 0 18 55 2 144 10 22 13 11 True: 6 5 6 49 30 6 24 138 5 9 0 True: 7 1 1 26 50 8 7 4 170 24 15 True: 8 2 1 35 77 1 39 15 13 73 30 True: 9 0 2 3 48 35 19 1 42 35 110 94/94 [==============================] - 0s 4ms/step - loss: 1.1693 - accuracy: 0.6750 CNN Results Loss,acc 1.1693161725997925 0.675000011920929 True: 0 148 6 11 0 13 12 41 1 1 38 True: 1 0 310 3 0 2 1 14 7 3 0 True: 2 5 12 277 1 7 2 0 6 0 3 True: 3 2 19 66 151 4 53 0 12 1 8 True: 4 1 35 8 0 247 0 20 2 2 3 True: 5 7 0 6 2 1 244 8 6 3 6 True: 6 6 32 1 0 44 5 173 1 3 7 True: 7 2 56 32 0 1 11 1 194 0 9 True: 8 7 31 21 1 10 13 28 15 128 32 True: 9 10 58 5 1 40 6 3 12 7 153 Shift 3 94/94 [==============================] - 0s 1ms/step - loss: 8.7090 - accuracy: 0.1493 FCN Results Loss,acc 8.70899772644043 0.14933332800865173 True: 0 1 0 83 71 0 45 33 9 20 9 True: 1 12 7 5 90 9 37 112 58 4 6 True: 2 1 0 83 83 4 22 87 18 6 9 True: 3 4 0 72 116 0 9 42 26 7 40 True: 4 2 1 39 94 27 30 46 28 42 9 True: 5 8 0 50 59 2 62 34 35 9 24 True: 6 7 6 71 49 13 47 55 5 18 1 True: 7 7 0 25 86 20 25 13 63 42 25 True: 8 0 0 32 109 0 34 40 23 18 30 True: 9 2 0 20 101 12 47 28 26 43 16 94/94 [==============================] - 0s 4ms/step - loss: 2.9385 - accuracy: 0.3320 CNN Results Loss,acc 2.938509225845337 0.3319999873638153 True: 0 40 40 44 1 27 14 49 14 1 41 True: 1 5 117 4 0 44 2 100 55 9 4 True: 2 12 30 197 2 21 12 6 29 0 4 True: 3 11 28 117 53 4 56 2 16 5 24 True: 4 2 99 15 0 143 5 40 10 0 4 True: 5 4 9 42 1 4 190 22 2 2 7 True: 6 17 63 3 0 70 13 93 1 2 10 True: 7 4 86 58 5 10 36 9 88 6 4 True: 8 8 53 39 2 13 20 45 39 43 24 True: 9 4 104 17 3 40 13 63 14 5 32 Shift 4 94/94 [==============================] - 0s 1ms/step - loss: 12.0978 - accuracy: 0.0670 FCN Results Loss,acc 12.097786903381348 0.06700000166893005 True: 0 1 0 77 70 0 47 37 15 5 19 True: 1 47 5 2 83 12 14 100 52 1 24 True: 2 0 1 29 85 8 21 113 30 11 15 True: 3 11 0 55 54 0 12 54 53 13 64 True: 4 11 1 52 84 2 44 63 25 33 3 True: 5 5 0 53 50 2 33 44 47 8 41 True: 6 12 0 57 61 3 47 51 18 13 10 True: 7 6 0 40 91 15 59 41 14 19 21 True: 8 6 0 28 94 1 22 65 25 9 36 True: 9 11 0 52 99 1 48 58 4 19 3 94/94 [==============================] - 0s 4ms/step - loss: 4.7570 - accuracy: 0.1517 CNN Results Loss,acc 4.7570390701293945 0.15166667103767395 True: 0 8 87 47 5 22 12 61 18 3 8 True: 1 21 46 3 0 116 1 70 70 1 12 True: 2 8 56 110 1 73 32 7 21 2 3 True: 3 11 17 172 21 10 32 5 31 5 12 True: 4 1 144 39 0 48 8 37 24 7 10 True: 5 4 15 80 5 8 135 20 12 0 4 True: 6 21 74 12 0 81 15 40 1 2 26 True: 7 6 65 96 6 13 47 43 27 1 2 True: 8 1 43 83 3 30 35 49 30 6 6 True: 9 4 110 27 2 11 30 60 24 13 14
import plotly.express as px
import plotly.io as pio
pio.renderers.default='notebook'
import pandas as pd
df_shift = pd.DataFrame({'Shift': shifts,
'FCN Accuracy': fcn_shift_acc,
'CNN Accuracy': cnn_shift_acc
})
display(df_shift.style)
#
# ACCURACY
fig = px.line(df_shift, x='Shift', y=['FCN Accuracy','CNN Accuracy'], title='Shift Comparison, FCN vs CNN')
fig.show()
| Shift | FCN Accuracy | CNN Accuracy | |
|---|---|---|---|
| 0 | 0 | 0.944000 | 0.936667 |
| 1 | 1 | 0.807667 | 0.884333 |
| 2 | 2 | 0.417000 | 0.672000 |
| 3 | 3 | 0.155667 | 0.334000 |
| 4 | 4 | 0.069333 | 0.149667 |
We see that standard, fully-connected neural networks, although powerful, have some clear shortcomings when applied to image classification:
Both of these issues are related: the FCN does not take advantage of the fact that - generally - in image classification, the images tend to be built from underlying common features. In the case of MNIST images, these are the curves and lines and corners which make up the individual digits. Convnets attempt to take advantage of these features.
There are a couple of things to notice when comparing the output from the code blocks above:
Using the same basic structure as above, compare how FCNs and CNNs perform on rotations.
Use the starter code below. You will need to randomize whether the roation is clockwize or counterclockwise.
The output should be just like above for the shifts:
fcn_rot_acc = []
cnn_rot_acc = []
thetas = [0.0,20.0,40.0,60.0,80.0]
for theta in thetas:
print()
print("Rotation ",theta)
imgList = []
for img in test_images[:]:
if random.uniform(0.0,1.0) > 0.5:
rot = theta
else:
rot = -theta
trans_image = transform_image(img,rotation=rot)
imgList.append(trans_image)
# Convert to np array: Note the second line reshape for FCNs
npa_images = np.asarray(imgList, dtype=np.float32)
#
# Run all of the shifted images through our previously train FCN and see how we do
smear_loss,smear_acc,smear_cf = getPerformance(fcn_network,npa_images,test_labels_cat,test_labels)
print()
print(" FCN Results")
print(" Loss,acc",smear_loss,smear_acc)
#
# Store the accuracy for the test set in a list
fcn_rot_acc.append(smear_acc)
#
# Print the confusion matrix
for trueClass in range(10):
print(" True: ",trueClass,end="")
for predClass in range(10):
print(" \t",smear_cf[trueClass][predClass],end="")
print()
#
# Run all of the shifted images through our previously train CNN and see how we do
smear_loss,smear_acc,smear_cf = getPerformance(cnn_network,npa_images,test_labels_cat,test_labels)
print()
print(" CNN Results")
print(" Loss,acc",smear_loss,smear_acc)
#
# Sore the accuracy for the test set in a list
cnn_rot_acc.append(smear_acc)
#
# Print the confusion matrix
for trueClass in range(10):
print(" True: ",trueClass,end="")
for predClass in range(10):
print(" \t",smear_cf[trueClass][predClass],end="")
print()
Rotation 0.0 94/94 [==============================] - 0s 1ms/step - loss: 0.1809 - accuracy: 0.9440 FCN Results Loss,acc 0.18091194331645966 0.9440000057220459 True: 0 262 0 1 0 0 2 5 0 1 0 True: 1 0 334 2 0 0 0 2 1 1 0 True: 2 3 1 295 1 1 1 2 3 4 2 True: 3 0 0 3 298 0 8 0 5 1 1 True: 4 0 0 3 0 303 0 3 1 1 7 True: 5 1 0 1 4 2 269 1 1 3 1 True: 6 3 3 2 0 3 3 254 1 3 0 True: 7 0 3 6 3 3 0 0 285 1 5 True: 8 4 0 2 6 2 0 1 2 267 2 True: 9 2 2 1 6 11 3 0 3 2 265 94/94 [==============================] - 0s 4ms/step - loss: 0.2078 - accuracy: 0.9367 CNN Results Loss,acc 0.20780420303344727 0.9366666674613953 True: 0 266 0 0 0 0 0 5 0 0 0 True: 1 0 339 1 0 0 0 0 0 0 0 True: 2 4 2 304 0 2 0 0 1 0 0 True: 3 1 9 17 239 0 38 0 5 3 4 True: 4 0 2 1 0 313 0 1 0 0 1 True: 5 0 0 0 0 2 278 2 1 0 0 True: 6 2 3 0 0 1 2 264 0 0 0 True: 7 1 6 11 0 1 0 0 284 0 3 True: 8 10 9 7 0 4 1 1 5 247 2 True: 9 1 5 0 0 10 2 1 0 0 276 Rotation 20.0 94/94 [==============================] - 0s 1ms/step - loss: 0.8062 - accuracy: 0.8067 FCN Results Loss,acc 0.8061539530754089 0.8066666722297668 True: 0 255 0 1 1 0 10 4 0 0 0 True: 1 0 300 2 3 13 2 1 0 19 0 True: 2 4 1 235 29 4 5 7 13 13 2 True: 3 1 1 10 269 2 15 0 4 12 2 True: 4 0 0 18 3 237 10 10 5 12 23 True: 5 5 0 0 6 6 230 10 2 22 2 True: 6 3 1 2 2 11 23 221 6 3 0 True: 7 2 1 24 10 8 2 1 211 9 38 True: 8 4 2 8 16 9 7 1 2 230 7 True: 9 1 1 2 4 25 6 1 12 11 232 94/94 [==============================] - 0s 4ms/step - loss: 0.5162 - accuracy: 0.8460 CNN Results Loss,acc 0.5162220001220703 0.8460000157356262 True: 0 266 0 0 0 0 0 4 0 0 1 True: 1 0 335 2 0 0 0 0 0 3 0 True: 2 5 19 276 0 3 0 0 9 0 1 True: 3 4 7 24 200 2 50 2 5 13 9 True: 4 0 2 8 0 285 1 5 2 2 13 True: 5 4 0 0 0 3 257 12 0 1 6 True: 6 6 3 0 0 5 7 250 0 0 1 True: 7 5 30 37 0 4 0 0 210 0 20 True: 8 12 14 16 1 19 4 2 2 198 18 True: 9 1 4 2 0 17 3 1 2 4 261 Rotation 40.0 94/94 [==============================] - 0s 1ms/step - loss: 3.5865 - accuracy: 0.4073 FCN Results Loss,acc 3.5865466594696045 0.4073333442211151 True: 0 241 0 2 0 0 14 3 5 2 4 True: 1 0 109 7 3 125 14 0 2 80 0 True: 2 3 4 70 68 33 11 27 58 32 7 True: 3 7 1 21 135 11 39 4 11 71 16 True: 4 3 0 90 6 89 15 34 6 41 34 True: 5 11 0 24 6 14 120 42 4 56 6 True: 6 10 0 2 11 43 48 143 11 4 0 True: 7 4 4 93 8 32 11 3 49 18 84 True: 8 2 0 33 35 26 21 20 0 143 6 True: 9 4 2 41 4 66 6 1 24 24 123 94/94 [==============================] - 0s 4ms/step - loss: 1.9715 - accuracy: 0.5103 CNN Results Loss,acc 1.9715429544448853 0.5103333592414856 True: 0 254 0 1 0 1 6 3 2 0 4 True: 1 0 253 19 0 33 26 2 1 6 0 True: 2 8 77 128 0 21 2 9 55 7 6 True: 3 23 11 16 55 20 98 28 3 39 23 True: 4 3 7 60 1 174 10 13 11 18 21 True: 5 28 4 3 2 20 142 58 0 10 16 True: 6 18 2 1 0 8 29 211 0 0 3 True: 7 9 47 107 0 45 1 0 51 3 43 True: 8 16 16 40 2 81 16 17 3 73 22 True: 9 8 3 23 1 30 6 17 7 10 190 Rotation 60.0 94/94 [==============================] - 0s 1ms/step - loss: 7.2198 - accuracy: 0.1780 FCN Results Loss,acc 7.219803333282471 0.17800000309944153 True: 0 230 0 9 2 4 12 7 3 2 2 True: 1 0 5 18 1 215 8 3 1 89 0 True: 2 9 0 10 31 70 43 38 44 60 8 True: 3 12 0 28 37 62 24 37 15 90 11 True: 4 2 1 147 7 64 19 35 16 22 5 True: 5 28 0 93 8 24 21 67 9 29 4 True: 6 9 0 7 17 70 71 69 13 14 2 True: 7 11 0 128 6 80 32 3 11 9 26 True: 8 13 1 87 10 54 16 49 0 54 2 True: 9 5 1 80 5 64 5 32 30 40 33 94/94 [==============================] - 0s 4ms/step - loss: 3.9787 - accuracy: 0.2323 CNN Results Loss,acc 3.978698968887329 0.23233333230018616 True: 0 241 0 3 0 0 11 6 1 1 8 True: 1 0 58 52 0 172 30 6 10 12 0 True: 2 10 53 60 0 72 5 13 57 31 12 True: 3 33 5 21 8 71 41 90 6 27 14 True: 4 2 9 115 0 65 49 27 30 18 3 True: 5 70 3 4 2 25 41 94 6 1 37 True: 6 34 1 1 1 21 57 142 0 2 13 True: 7 12 22 148 0 77 14 15 8 2 8 True: 8 26 6 80 0 92 2 54 3 15 8 True: 9 12 2 105 0 20 15 57 5 20 59 Rotation 80.0 94/94 [==============================] - 0s 1ms/step - loss: 8.7695 - accuracy: 0.1330 FCN Results Loss,acc 8.76949405670166 0.13300000131130219 True: 0 202 0 13 3 5 4 30 5 6 3 True: 1 0 0 12 0 302 7 7 2 10 0 True: 2 5 2 25 12 92 58 35 15 63 6 True: 3 14 0 37 5 85 18 114 2 36 5 True: 4 2 0 108 18 71 25 52 13 24 5 True: 5 37 0 72 1 39 1 87 15 20 11 True: 6 6 0 36 48 48 52 50 13 19 0 True: 7 8 3 48 12 170 14 24 10 7 10 True: 8 8 0 64 2 65 9 101 1 31 5 True: 9 12 0 100 6 66 3 84 6 14 4 94/94 [==============================] - 0s 5ms/step - loss: 5.1309 - accuracy: 0.1640 CNN Results Loss,acc 5.13093376159668 0.164000004529953 True: 0 223 0 1 0 0 13 17 2 0 15 True: 1 0 2 67 0 258 8 2 1 2 0 True: 2 13 16 88 1 97 8 25 24 31 10 True: 3 35 5 23 0 83 16 126 3 20 5 True: 4 0 13 106 1 47 88 31 27 3 2 True: 5 57 3 1 0 33 8 127 5 1 48 True: 6 37 0 10 7 17 62 105 4 8 22 True: 7 12 9 100 0 103 14 57 3 4 4 True: 8 16 6 65 1 80 2 91 6 9 10 True: 9 10 0 114 0 37 25 98 1 3 7
# Plots and charts here
df_rot = pd.DataFrame({'Rotation': thetas,
'FCN Accuracy': fcn_rot_acc,
'CNN Accuracy': cnn_rot_acc
})
display(df_rot.style)
#
# ACCURACY
fig = px.line(df_rot, x='Rotation', y=['FCN Accuracy','CNN Accuracy'], title='Rotation Comparison, FCN vs CNN')
fig.show()
| Rotation | FCN Accuracy | CNN Accuracy | |
|---|---|---|---|
| 0 | 0.000000 | 0.944000 | 0.936667 |
| 1 | 20.000000 | 0.806667 | 0.846000 |
| 2 | 40.000000 | 0.407333 | 0.510333 |
| 3 | 60.000000 | 0.178000 | 0.232333 |
| 4 | 80.000000 | 0.133000 | 0.164000 |
Using the same basic structure as above, compare how FCNs and CNNs perform on zooms.
Use the starter code below. You will not need to randomize the zoom.
The output should be just like above for the shifts:
fcn_zoom_acc = []
cnn_zoom_acc = []
zooms = [0.25,0.5,1.0,1.5,2.0]
for zoom in zooms:
print()
print("zoom ",zoom)
imgList = []
for img in test_images[:]:
rescale = zoom
trans_image = transform_image(img,zoom=rescale)
imgList.append(trans_image)
# Convert to np array: Note the second line reshape for FCNs
npa_images = np.asarray(imgList, dtype=np.float32)
#
# Run all of the shifted images through our previously train FCN and see how we do
smear_loss,smear_acc,smear_cf = getPerformance(fcn_network,npa_images,test_labels_cat,test_labels)
print()
print(" FCN Results")
print(" Loss,acc",smear_loss,smear_acc)
#
# Store the accuracy for the test set in a list
fcn_zoom_acc.append(smear_acc)
#
# Print the confusion matrix
for trueClass in range(10):
print(" True: ",trueClass,end="")
for predClass in range(10):
print(" \t",smear_cf[trueClass][predClass],end="")
print()
#
# Run all of the shifted images through our previously train CNN and see how we do
smear_loss,smear_acc,smear_cf = getPerformance(cnn_network,npa_images,test_labels_cat,test_labels)
print()
print(" CNN Results")
print(" Loss,acc",smear_loss,smear_acc)
#
# Sore the accuracy for the test set in a list
cnn_zoom_acc.append(smear_acc)
#
# Print the confusion matrix
for trueClass in range(10):
print(" True: ",trueClass,end="")
for predClass in range(10):
print(" \t",smear_cf[trueClass][predClass],end="")
print()
zoom 0.25 94/94 [==============================] - 0s 1ms/step - loss: 13.8731 - accuracy: 0.0637 FCN Results Loss,acc 13.873125076293945 0.06366666406393051 True: 0 0 1 52 36 0 96 20 44 0 22 True: 1 0 0 31 289 0 2 17 0 0 1 True: 2 0 0 61 161 0 79 0 6 1 5 True: 3 0 0 93 107 0 37 51 19 7 2 True: 4 0 0 117 70 0 115 1 14 0 1 True: 5 0 0 72 107 0 16 28 56 0 4 True: 6 0 0 61 145 0 40 6 6 13 1 True: 7 0 0 79 57 0 162 7 0 0 1 True: 8 0 0 22 240 0 19 2 3 0 0 True: 9 0 0 111 53 0 122 2 4 2 1 94/94 [==============================] - 0s 5ms/step - loss: 3.9043 - accuracy: 0.1997 CNN Results Loss,acc 3.9043478965759277 0.19966666400432587 True: 0 9 0 137 4 6 106 9 0 0 0 True: 1 11 197 0 1 72 0 25 0 34 0 True: 2 32 67 143 13 27 6 7 2 13 3 True: 3 16 1 33 5 134 39 35 45 4 4 True: 4 22 12 179 3 64 2 11 5 2 18 True: 5 6 0 81 8 64 40 20 32 1 31 True: 6 39 29 68 7 42 10 55 1 15 6 True: 7 33 65 130 5 42 10 10 1 6 4 True: 8 31 9 40 0 88 11 32 5 69 1 True: 9 24 12 141 6 73 7 4 9 3 16 zoom 0.5 94/94 [==============================] - 0s 1ms/step - loss: 13.2531 - accuracy: 0.0450 FCN Results Loss,acc 13.25312614440918 0.04500000178813934 True: 0 0 0 58 115 0 66 13 17 0 2 True: 1 2 2 55 243 0 6 24 0 7 1 True: 2 0 0 41 180 0 57 6 20 1 8 True: 3 1 0 40 64 0 29 33 45 1 103 True: 4 0 0 43 168 1 36 41 23 5 1 True: 5 1 0 24 83 0 9 85 69 1 11 True: 6 0 0 50 162 0 29 6 8 0 17 True: 7 0 0 34 158 0 29 81 4 0 0 True: 8 0 0 17 174 0 31 43 8 5 8 True: 9 0 0 19 203 0 4 61 3 2 3 94/94 [==============================] - 0s 4ms/step - loss: 3.6028 - accuracy: 0.3793 CNN Results Loss,acc 3.602781295776367 0.37933334708213806 True: 0 77 0 75 7 4 89 19 0 0 0 True: 1 1 326 0 3 6 1 1 0 2 0 True: 2 4 94 191 3 11 3 3 1 1 2 True: 3 3 7 33 13 119 78 7 22 1 33 True: 4 7 5 100 0 179 0 19 7 0 1 True: 5 7 4 31 2 22 145 25 44 0 3 True: 6 26 34 40 13 3 22 132 0 1 1 True: 7 2 34 226 2 9 8 4 21 0 0 True: 8 6 54 63 0 62 11 25 11 53 1 True: 9 9 8 121 2 125 4 7 18 0 1 zoom 1.0 94/94 [==============================] - 0s 1ms/step - loss: 0.1809 - accuracy: 0.9440 FCN Results Loss,acc 0.18091194331645966 0.9440000057220459 True: 0 262 0 1 0 0 2 5 0 1 0 True: 1 0 334 2 0 0 0 2 1 1 0 True: 2 3 1 295 1 1 1 2 3 4 2 True: 3 0 0 3 298 0 8 0 5 1 1 True: 4 0 0 3 0 303 0 3 1 1 7 True: 5 1 0 1 4 2 269 1 1 3 1 True: 6 3 3 2 0 3 3 254 1 3 0 True: 7 0 3 6 3 3 0 0 285 1 5 True: 8 4 0 2 6 2 0 1 2 267 2 True: 9 2 2 1 6 11 3 0 3 2 265 94/94 [==============================] - 0s 4ms/step - loss: 0.2078 - accuracy: 0.9367 CNN Results Loss,acc 0.20780420303344727 0.9366666674613953 True: 0 266 0 0 0 0 0 5 0 0 0 True: 1 0 339 1 0 0 0 0 0 0 0 True: 2 4 2 304 0 2 0 0 1 0 0 True: 3 1 9 17 239 0 38 0 5 3 4 True: 4 0 2 1 0 313 0 1 0 0 1 True: 5 0 0 0 0 2 278 2 1 0 0 True: 6 2 3 0 0 1 2 264 0 0 0 True: 7 1 6 11 0 1 0 0 284 0 3 True: 8 10 9 7 0 4 1 1 5 247 2 True: 9 1 5 0 0 10 2 1 0 0 276 zoom 1.5 94/94 [==============================] - 0s 1ms/step - loss: 3.3232 - accuracy: 0.3743 FCN Results Loss,acc 3.323204755783081 0.37433332204818726 True: 0 105 0 1 0 103 0 30 31 0 1 True: 1 0 330 0 0 8 0 0 1 1 0 True: 2 0 32 131 0 53 0 1 95 0 1 True: 3 0 97 6 60 74 5 10 54 0 10 True: 4 0 1 0 0 317 0 0 0 0 0 True: 5 0 35 3 18 149 52 18 4 3 1 True: 6 0 18 0 1 244 0 9 0 0 0 True: 7 0 44 0 0 144 0 0 117 0 1 True: 8 0 76 6 5 179 0 9 1 2 8 True: 9 0 14 0 0 279 1 0 1 0 0 94/94 [==============================] - 0s 4ms/step - loss: 1.0342 - accuracy: 0.6137 CNN Results Loss,acc 1.0342309474945068 0.6136666536331177 True: 0 176 4 2 0 25 0 33 1 0 30 True: 1 0 337 1 0 2 0 0 0 0 0 True: 2 1 19 265 0 7 0 0 12 0 9 True: 3 0 64 25 52 16 72 6 24 1 56 True: 4 0 12 0 0 306 0 0 0 0 0 True: 5 0 4 1 0 23 218 21 2 0 14 True: 6 0 15 0 0 126 1 129 0 0 1 True: 7 0 68 3 0 15 0 0 220 0 0 True: 8 0 109 3 0 31 0 35 7 72 29 True: 9 0 42 0 0 182 4 0 1 0 66 zoom 2.0 94/94 [==============================] - 0s 1ms/step - loss: 6.9229 - accuracy: 0.2070 FCN Results Loss,acc 6.922941207885742 0.2070000022649765 True: 0 0 4 0 0 265 0 0 2 0 0 True: 1 0 309 0 0 31 0 0 0 0 0 True: 2 0 35 1 0 261 0 0 16 0 0 True: 3 0 78 0 0 233 1 0 4 0 0 True: 4 0 8 0 0 310 0 0 0 0 0 True: 5 0 39 0 0 243 0 0 0 1 0 True: 6 0 36 0 0 236 0 0 0 0 0 True: 7 0 23 0 0 277 0 0 0 0 6 True: 8 0 45 0 0 241 0 0 0 0 0 True: 9 0 15 0 0 279 0 0 0 0 1 94/94 [==============================] - 0s 4ms/step - loss: 2.3023 - accuracy: 0.2990 CNN Results Loss,acc 2.30234956741333 0.29899999499320984 True: 0 0 11 0 0 139 4 103 0 0 14 True: 1 0 339 1 0 0 0 0 0 0 0 True: 2 0 72 130 0 74 1 0 15 0 21 True: 3 0 153 24 0 38 81 5 4 0 11 True: 4 0 27 0 0 291 0 0 0 0 0 True: 5 0 48 1 0 80 118 36 0 0 0 True: 6 0 30 0 0 236 0 6 0 0 0 True: 7 0 221 1 0 71 1 0 12 0 0 True: 8 0 196 2 0 69 0 17 1 1 0 True: 9 0 95 0 0 195 4 1 0 0 0
smear_loss,smear_acc,smear_cf = getPerformance(cnn_network,npa_images,test_labels_cat,test_labels)
print()
print(" CNN Results")
print(" Loss,acc",smear_loss,smear_acc)
#
# Sore the accuracy for the test set in a list
cnn_zoom_acc.append(smear_acc)
#
# Print the confusion matrix
for trueClass in range(10):
print(" True: ",trueClass,end="")
for predClass in range(10):
print(" \t",smear_cf[trueClass][predClass],end="")
print()
# Plots and charts here
# Plots and charts here
df_zoom = pd.DataFrame({'Zoom': zooms,
'FCN Accuracy': fcn_zoom_acc,
'CNN Accuracy': cnn_zoom_acc
})
display(df_zoom.style)
#
# ACCURACY
fig = px.line(df_zoom, x='Zoom', y=['FCN Accuracy','CNN Accuracy'], title='Zoom Comparison, FCN vs CNN')
fig.show()
| Zoom | FCN Accuracy | CNN Accuracy | |
|---|---|---|---|
| 0 | 0.250000 | 0.063667 | 0.199667 |
| 1 | 0.500000 | 0.045000 | 0.379333 |
| 2 | 1.000000 | 0.944000 | 0.936667 |
| 3 | 1.500000 | 0.374333 | 0.613667 |
| 4 | 2.000000 | 0.207000 | 0.299000 |